
**********
* Readme *
**********

* This script replicates tables 2 and 3 (as tables 9 and 10) taking domestic workers out from the own-account workers group.


* Root folder (PATH TO BE DEFINED BY THE USER)
**********************************************
clear all
global analysis "C:/***/replication_package"


* Timestamped log
*****************
global today = strofreal(date(c(current_date), "DMY"), "%tdYYNNDD")
log using "${analysis}/code/logs/3_4_descriptive_labor_stats_appx_dom_${today}.smcl", replace


*******************************
* Table 9: Who are the OAWs ? *
*******************************

* PNAD data
use "${analysis}/data/2_4_pnad_clean.dta", clear

keep strata_id psu_id pweight /// 
	work_state_dom work_state_dom_name occupation ///
	rg_* educ_* age_* formal wp_*
	
svyset psu_id [pweight = pweight], strata(strata_id) singleunit(centered)

* Variables in the table
global variables rg_1_nonwhite_female rg_2_white_female rg_3_nonwhite_male rg_4_white_male educ_1 educ_2 educ_3 educ_4 age_1 age_2 age_3 age_4 age_5 formal wp_1 wp_2 wp_3 wp_4 wp_5 wp_6 wp_7 wp_8

tempfile full_data
save `full_data', replace


* Attributes
************

use `full_data', clear

* Population
gen pop = 1/(10^6) 	// population in millions
replace pop = pop/8	// average over 8 quarters

* Percentages
replace formal = formal * 100

replace rg_1_nonwhite_female = rg_1_nonwhite_female * 100
replace rg_2_white_female    = rg_2_white_female * 100
replace rg_3_nonwhite_male   = rg_3_nonwhite_male * 100
replace rg_4_white_male      = rg_4_white_male * 100

replace educ_1 = educ_1 * 100
replace educ_2 = educ_2 * 100
replace educ_3 = educ_3 * 100
replace educ_4 = educ_4 * 100

replace age_1 = age_1 * 100
replace age_2 = age_2 * 100
replace age_3 = age_3 * 100
replace age_4 = age_4 * 100
replace age_5 = age_5 * 100

replace wp_1 = wp_1 * 100
replace wp_2 = wp_2 * 100
replace wp_3 = wp_3 * 100
replace wp_4 = wp_4 * 100
replace wp_5 = wp_5 * 100
replace wp_6 = wp_6 * 100
replace wp_7 = wp_7 * 100
replace wp_8 = wp_8 * 100


* Domestic worker
*****************

svy, subpop(if work_state_dom_name == "Domestic worker"): total pop
matrix dom_n = r(table)

svy, subpop(if work_state_dom_name == "Domestic worker"): mean $variables
matrix dom_stats = r(table)

matrix dom = dom_n["b",1...], dom_stats["b",1...]
matrix rownames dom = "Domestic worker"


* OAW
*****

svy, subpop(if work_state_dom_name == "Own-account worker"): total pop
matrix oaw_n = r(table)

svy, subpop(if work_state_dom_name == "Own-account worker"): mean $variables
matrix oaw_stats = r(table)

matrix oaw = oaw_n["b",1...], oaw_stats["b",1...]
matrix rownames oaw = "OAW"


* Employee
**********

svy, subpop(if work_state_dom_name == "Employee"): total pop
matrix ee_n = r(table)

svy, subpop(if work_state_dom_name == "Employee"): mean $variables
matrix ee_stats = r(table)

matrix ee = ee_n["b",1...], ee_stats["b",1...]
matrix rownames ee = "Employee"


* Combined results
******************

matrix dom_oaw_ee = dom\oaw\ee
matrix list dom_oaw_ee


* Dataset with results
**********************

clear
svmat dom_oaw_ee, names(col)
tostring *, replace format(%9.1f) force
sxpose2, clear force varname

rename _varname attribute
rename _var1 dom
rename _var2 oaw
rename _var3 ee


* Cleaning output
*****************

replace attribute = "\textit{Subpopulation size (in millions)}" if attribute == "pop"

replace attribute = "\tabularnewline \textit{Ethnicity and gender (in \%)} \\ \hspace{2ex} Nonwhite female" if attribute == "rg_1_nonwhite_female"
replace attribute = "\hspace{2ex} White female" if attribute == "rg_2_white_female"
replace attribute = "\hspace{2ex} Nonwhite male" if attribute == "rg_3_nonwhite_male"
replace attribute = "\hspace{2ex} White male" if attribute == "rg_4_white_male"

replace attribute = "\tabularnewline \textit{Education level (in \%)} \\ \hspace{2ex} Less than prim. school" if attribute == "educ_1"
replace attribute = "\hspace{2ex} Primary school" if attribute == "educ_2"
replace attribute = "\hspace{2ex} High school" if attribute == "educ_3"
replace attribute = "\hspace{2ex} College or above" if attribute == "educ_4"

replace attribute = "\tabularnewline \textit{Age group (in \%)} \\ \hspace{2ex} Age 14-24" if attribute == "age_1"
replace attribute = "\hspace{2ex} Age 25-34" if attribute == "age_2"
replace attribute = "\hspace{2ex} Age 35-44" if attribute == "age_3"
replace attribute = "\hspace{2ex} Age 45-54" if attribute == "age_4"
replace attribute = "\hspace{2ex} Age 55-64" if attribute == "age_5"

replace attribute = "\tabularnewline \textit{Formal work status (in \%)}" if attribute == "formal"

replace attribute = "\tabularnewline \textit{Usual workplace (in \%)} \\ \hspace{2ex} Dedicated store, office" if attribute == "wp_1"
replace attribute = "\hspace{2ex} Place chosen by client, employer" if attribute == "wp_2"
replace attribute = "\hspace{2ex} Client's, employer's home" if attribute == "wp_3"
replace attribute = "\hspace{2ex} Worker's home (dedicated area)" if attribute == "wp_4"
replace attribute = "\hspace{2ex} Worker's home (shared area)" if attribute == "wp_5"
replace attribute = "\hspace{2ex} Worker's vehicle" if attribute == "wp_6"
replace attribute = "\hspace{2ex} Public space" if attribute == "wp_7"
replace attribute = "\hspace{2ex} Other places" if attribute == "wp_8"

list, separator(50) 


* TABLE 9
*********

texsave * using "${analysis}/results/tables/table9.tex", dataonly replace nofix noendash


**************************************
* Table 10: What are the OAWs doing? *
**************************************

use `full_data', clear


* Occupation
************

* Following ISCO-08 https://en.wikipedia.org/wiki/International_Standard_Classification_of_Occupations

* Add prefix "10" to military occupations to avoid unintended merge with other groups
replace occupation = 10110 if occupation == 110
replace occupation = 10210 if occupation == 210
replace occupation = 10411 if occupation == 411
replace occupation = 10412 if occupation == 412
replace occupation = 10511 if occupation == 511
replace occupation = 10512 if occupation == 512

* ISCO level 4
gen occupation_4 = floor(occupation/1)*1

* Define the standard ISCO labels
do "${analysis}/code/isco_labels.do" 
 
* Label the values
label values occupation_4 isco08_lbl

* Summary by work state
keep if inlist(work_state_dom_name, "Own-account worker", "Employee", "Domestic worker")
gen freq = pweight/8 // average over 8 quarters, each representing the whole population
gcollapse (sum) freq, by(work_state_dom_name occupation_4)
replace freq = freq/1000
by work_state_dom_name: egen tot = total(freq)
gen prop = 100*freq/tot
gsort work_state_dom_name -prop 
drop  freq tot
decode occupation_*, generate(occupation_str)
tostring prop, generate(prop_str) format(%7.1f) force
generate prop_str_par = "(" + prop_str +")"
generate oaw = occupation_str + ": " + prop_str_par if work_state_dom_name == "Own-account worker"
generate ee = occupation_str + ": " + prop_str_par if work_state_dom_name == "Employee"
generate dom = occupation_str + ": " + prop_str if work_state_dom_name == "Domestic worker"
drop work_state_dom_name prop occupation_* prop_str prop_str_par

tempfile all_occupations
save `all_occupations', replace


* Most common occupations for employee
**************************************

use `all_occupations', clear
keep if ee != ""
drop oaw dom
drop if _n > 10
gen rank = _n
order rank

tempfile ee_occupations
save `ee_occupations', replace


* Most common occupations for OAW
*********************************

use `all_occupations', clear
keep if oaw != ""
drop ee dom
drop if _n > 10
gen rank = _n
order rank

tempfile oaw_occupations
save `oaw_occupations', replace


* Most common occupations for domestic workers
**********************************************

use `all_occupations', clear
keep if dom != ""
drop oaw ee
drop if _n > 10
gen rank = _n
order rank

merge 1:1 rank using `oaw_occupations', nogenerate force
merge 1:1 rank using `ee_occupations', nogenerate force
tostring rank, replace

replace rank = "1st"  if rank == "1"
replace rank = "2nd"  if rank == "2"
replace rank = "3rd"  if rank == "3"
replace rank = "4th"  if rank == "4"
replace rank = "5th"  if rank == "5"
replace rank = "6th"  if rank == "6"
replace rank = "7th"  if rank == "7"
replace rank = "8th"  if rank == "8"
replace rank = "9th"  if rank == "9"
replace rank = "10th" if rank == "10"

list, separator(50) 


* TABLE 10
**********

texsave * using "${analysis}/results/tables/table10.tex", dataonly replace nofix noendash


* End of script
***************
cap log close